{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-Lf43iNoX2dt"
      },
      "source": [
        "# Cleaning data for public release"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "8SDweS3wPMCD"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "from collections import Counter\n",
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rwnAQOekYp06",
        "outputId": "ad8b0f1f-9d05-4eb7-9d98-536f855f55f5"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
          ]
        }
      ],
      "source": [
        "# from google.colab import drive\n",
        "# drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rLQJxVp5PWDP"
      },
      "outputs": [],
      "source": [
        "# data_dir = '/content/drive/MyDrive/char_gender/data/'\n",
        "data_dir = '../data/'\n",
        "if not os.path.exists(data_dir):\n",
        "    os.makedirs(data_dir)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "cPjuw4AsQlAG",
        "outputId": "03500f47-a185-4974-b752-888ee5a94adc"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "3508\n"
          ]
        }
      ],
      "source": [
        "# We don't release this dataset because contains annotator's Prolific IDs\n",
        "df = pd.read_csv(os.path.join(data_dir, 'char_gender_final.csv'), header=1).iloc[1:].rename(columns={'Recorded Date': 'date',\n",
        "                           'First Story': 'First_Story'.lower(),\n",
        "                           'First Gender': 'First_Gender'.lower(),\n",
        "                           'Your gender - Selected Choice':'gender',\n",
        "                           'Suppose you had both novels in your hand. Which one would you continue reading?': 'chosen_story'})\n",
        "print(len(df))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RIM5WSKBNA_v",
        "outputId": "3895eb09-5329-4e4b-8f42-944743a8a5af"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "3002\n"
          ]
        }
      ],
      "source": [
        "rids_awesome = []\n",
        "\n",
        "for index, row in df.iterrows():\n",
        "\n",
        "  cond_hike = (row['In Novel A, how long is the trail that Sam takes?'] == '6 miles') and (row['In Novel A, what plant does Sam rest against?'] == 'Juniper tree') and (row['In Novel B, when does Alex go to the cafe?'] == 'Morning') and (row['In Novel B, what is the material of the chair Alex sketches?'] == 'Wood')\n",
        "\n",
        "  cond_coffee = (row['In Novel B, how long is the trail that Sam takes?'] == '6 miles') and (row['In Novel B, what plant does Sam rest against?'] == 'Juniper tree') and (row['In Novel A, when does Alex go to the cafe?'] == 'Morning') and (row['In Novel A, what is the material of the chair Alex sketches?'] == 'Wood')\n",
        "\n",
        "  # remove data from pilot\n",
        "  recent = '2024-05' in row['date'] and int(row['date'].split('-')[2][:2]) > 16\n",
        "\n",
        "  # only keep complete surveys\n",
        "  complete = not pd.isna(row['chosen_story']) and not pd.isna(row['gender'])\n",
        "\n",
        "  # only keep participants who did not pass reading comprehension check\n",
        "  correct = cond_hike or cond_coffee\n",
        "\n",
        "  if recent and complete and correct:\n",
        "\n",
        "    rids_awesome.append(row['Response ID'])\n",
        "\n",
        "print(len(rids_awesome))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "ICJy9DZgcGsU"
      },
      "outputs": [],
      "source": [
        "awesome = df.set_index('Response ID').loc[rids_awesome[:3000]]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Pd8wucYEbRwm",
        "outputId": "47f55f53-23c1-412f-acb8-f71e592f22ff"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "17"
            ]
          },
          "execution_count": 7,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# remove participants who identified as non-binary\n",
        "enby = 0\n",
        "rids_clean = []\n",
        "for index, row in awesome.iterrows():\n",
        "  if row['gender'] == 'Woman' or row['gender'] == 'Man':\n",
        "    rids_clean.append(index)\n",
        "  else:\n",
        "    enby += 1\n",
        "enby"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "d6Lvt1V7N_-S",
        "outputId": "5b8f1090-eabe-4beb-d98a-5def96c9f829"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "2983"
            ]
          },
          "execution_count": 8,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "clean = df.set_index('Response ID').loc[rids_clean]\n",
        "len(clean)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "id": "iy_rVEskgMNa"
      },
      "outputs": [],
      "source": [
        "chose_hike = [] # cases where the participant chose the hike story\n",
        "treatmentA = [] # cases where the hike story has woman protagonist\n",
        "\n",
        "for index, row in clean.iterrows():\n",
        "\n",
        "  if (row['first_story'] == 'Hike' and row['chosen_story'] == 'Novel A') or (row['first_story'] == 'Coffee' and row['chosen_story'] == 'Novel B'):\n",
        "    chose_hike.append(1)\n",
        "  else:\n",
        "    chose_hike.append(0)\n",
        "\n",
        "  if (row['first_gender'] == 'W' and row['first_story'] == 'Hike') or (row['first_gender'] == 'M' and row['first_story'] == 'Coffee'):\n",
        "    treatmentA.append('Hike')\n",
        "  else:\n",
        "    treatmentA.append('Other')\n",
        "\n",
        "clean['Treatment A'] = treatmentA\n",
        "clean['chose_hike'] = chose_hike"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "ZfIL-FzFj18c"
      },
      "outputs": [],
      "source": [
        "clean['respondent_woman'] = clean['gender'].apply(lambda x: 1 if x == 'Woman' else 0)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XcmnWax05sXR",
        "outputId": "9c912c30-d295-4e35-f350-85e65f6bd457"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "1492 1491\n"
          ]
        }
      ],
      "source": [
        "W = 0\n",
        "M = 0\n",
        "for x in clean['gender']:\n",
        "  if x == 'Woman':\n",
        "    W += 1\n",
        "  else:\n",
        "    M += 1\n",
        "print(W, M)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "YwDBHUJga1k-"
      },
      "outputs": [],
      "source": [
        "# remove columns with Prolific IDs and a column named \"boh\" created by mistake\n",
        "public = clean[clean.columns[16:]].drop(columns=['PROLIFIC_PID', 'What is your Prolific ID?\\n\\nPlease note that this response should auto-fill with the correct ID.', 'boh'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 892
        },
        "id": "V_W24vwZXB6O",
        "outputId": "982440ab-52f1-4541-fcf4-59ce3f3e62c7"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "public"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-dd7481f9-24de-4801-863b-3b4f74109a7b\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>If you agree to these conditions, please click \"I consent to participate\" below. If you do not agree, click the “I do not consent to participate” option.\\n\\nBy agreeing to participate, you confirm that you are over 18 years of age.\\n\\n \\n\\nI have read the above information.</th>\n",
              "      <th>In Novel A, how long is the trail that Sam takes?</th>\n",
              "      <th>In Novel A, what plant does Sam rest against?</th>\n",
              "      <th>In Novel B, when does Alex go to the cafe?</th>\n",
              "      <th>In Novel B, what is the material of the chair Alex sketches?</th>\n",
              "      <th>chosen_story</th>\n",
              "      <th>Your opinion is fundamental to understand reader preferences. Please take your time to answer this question.\\n\\nIn your own words, briefly share your motivation for your choice (minimum 200 characters, or about 40 words).</th>\n",
              "      <th>Your age:</th>\n",
              "      <th>gender</th>\n",
              "      <th>Your gender - Prefer to self describe: - Text</th>\n",
              "      <th>...</th>\n",
              "      <th>How would you describe your political views?</th>\n",
              "      <th>In Novel A, when does Alex go to the cafe?</th>\n",
              "      <th>In Novel A, what is the material of the chair Alex sketches?</th>\n",
              "      <th>In Novel B, how long is the trail that Sam takes?</th>\n",
              "      <th>In Novel B, what plant does Sam rest against?</th>\n",
              "      <th>first_gender</th>\n",
              "      <th>first_story</th>\n",
              "      <th>Treatment A</th>\n",
              "      <th>chose_hike</th>\n",
              "      <th>respondent_woman</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Response ID</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>R_7rxJeX0686uzPHi</th>\n",
              "      <td>I consent to participate.</td>\n",
              "      <td>6 miles</td>\n",
              "      <td>Juniper tree</td>\n",
              "      <td>Morning</td>\n",
              "      <td>Wood</td>\n",
              "      <td>Novel A</td>\n",
              "      <td>Novel A has more information about a potential...</td>\n",
              "      <td>25-35</td>\n",
              "      <td>Man</td>\n",
              "      <td>NaN</td>\n",
              "      <td>...</td>\n",
              "      <td>Moderate</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>W</td>\n",
              "      <td>Hike</td>\n",
              "      <td>Hike</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>R_1Mng04p43Zl3xvb</th>\n",
              "      <td>I consent to participate.</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Novel B</td>\n",
              "      <td>Novel B drew me in with the suspense of exactl...</td>\n",
              "      <td>36-45</td>\n",
              "      <td>Man</td>\n",
              "      <td>NaN</td>\n",
              "      <td>...</td>\n",
              "      <td>Moderate</td>\n",
              "      <td>Morning</td>\n",
              "      <td>Wood</td>\n",
              "      <td>6 miles</td>\n",
              "      <td>Juniper tree</td>\n",
              "      <td>M</td>\n",
              "      <td>Coffee</td>\n",
              "      <td>Hike</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>R_1IHhRxaGPtHjhQ2</th>\n",
              "      <td>I consent to participate.</td>\n",
              "      <td>6 miles</td>\n",
              "      <td>Juniper tree</td>\n",
              "      <td>Morning</td>\n",
              "      <td>Wood</td>\n",
              "      <td>Novel A</td>\n",
              "      <td>I prefer to read novel A because it was easier...</td>\n",
              "      <td>25-35</td>\n",
              "      <td>Woman</td>\n",
              "      <td>NaN</td>\n",
              "      <td>...</td>\n",
              "      <td>Very liberal</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>W</td>\n",
              "      <td>Hike</td>\n",
              "      <td>Hike</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>R_1oFSEE8V2SpwrhD</th>\n",
              "      <td>I consent to participate.</td>\n",
              "      <td>6 miles</td>\n",
              "      <td>Juniper tree</td>\n",
              "      <td>Morning</td>\n",
              "      <td>Wood</td>\n",
              "      <td>Novel B</td>\n",
              "      <td>I'm also interested in art and enjoy a main ch...</td>\n",
              "      <td>18-24</td>\n",
              "      <td>Woman</td>\n",
              "      <td>NaN</td>\n",
              "      <td>...</td>\n",
              "      <td>Moderate</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>W</td>\n",
              "      <td>Hike</td>\n",
              "      <td>Hike</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>R_1jVUjK5fbarEH9T</th>\n",
              "      <td>I consent to participate.</td>\n",
              "      <td>6 miles</td>\n",
              "      <td>Juniper tree</td>\n",
              "      <td>Morning</td>\n",
              "      <td>Wood</td>\n",
              "      <td>Novel A</td>\n",
              "      <td>My curiosity about the contents of the note/le...</td>\n",
              "      <td>36-45</td>\n",
              "      <td>Woman</td>\n",
              "      <td>NaN</td>\n",
              "      <td>...</td>\n",
              "      <td>Liberal</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>W</td>\n",
              "      <td>Hike</td>\n",
              "      <td>Hike</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 24 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-dd7481f9-24de-4801-863b-3b4f74109a7b')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-dd7481f9-24de-4801-863b-3b4f74109a7b button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-dd7481f9-24de-4801-863b-3b4f74109a7b');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-5da5a6f6-6e76-405d-bf77-4c284e76d02a\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5da5a6f6-6e76-405d-bf77-4c284e76d02a')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-5da5a6f6-6e76-405d-bf77-4c284e76d02a button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                  If you agree to these conditions, please click \"I consent to participate\" below. If you do not agree, click the “I do not consent to participate” option.\\n\\nBy agreeing to participate, you confirm that you are over 18 years of age.\\n\\n \\n\\nI have read the above information.  \\\n",
              "Response ID                                                                                                                                                                                                                                                                                            \n",
              "R_7rxJeX0686uzPHi                          I consent to participate.                                                                                                                                                                                                                                   \n",
              "R_1Mng04p43Zl3xvb                          I consent to participate.                                                                                                                                                                                                                                   \n",
              "R_1IHhRxaGPtHjhQ2                          I consent to participate.                                                                                                                                                                                                                                   \n",
              "R_1oFSEE8V2SpwrhD                          I consent to participate.                                                                                                                                                                                                                                   \n",
              "R_1jVUjK5fbarEH9T                          I consent to participate.                                                                                                                                                                                                                                   \n",
              "\n",
              "                  In Novel A, how long is the trail that Sam takes?  \\\n",
              "Response ID                                                           \n",
              "R_7rxJeX0686uzPHi                                           6 miles   \n",
              "R_1Mng04p43Zl3xvb                                               NaN   \n",
              "R_1IHhRxaGPtHjhQ2                                           6 miles   \n",
              "R_1oFSEE8V2SpwrhD                                           6 miles   \n",
              "R_1jVUjK5fbarEH9T                                           6 miles   \n",
              "\n",
              "                  In Novel A, what plant does Sam rest against?  \\\n",
              "Response ID                                                       \n",
              "R_7rxJeX0686uzPHi                                  Juniper tree   \n",
              "R_1Mng04p43Zl3xvb                                           NaN   \n",
              "R_1IHhRxaGPtHjhQ2                                  Juniper tree   \n",
              "R_1oFSEE8V2SpwrhD                                  Juniper tree   \n",
              "R_1jVUjK5fbarEH9T                                  Juniper tree   \n",
              "\n",
              "                  In Novel B, when does Alex go to the cafe?  \\\n",
              "Response ID                                                    \n",
              "R_7rxJeX0686uzPHi                                    Morning   \n",
              "R_1Mng04p43Zl3xvb                                        NaN   \n",
              "R_1IHhRxaGPtHjhQ2                                    Morning   \n",
              "R_1oFSEE8V2SpwrhD                                    Morning   \n",
              "R_1jVUjK5fbarEH9T                                    Morning   \n",
              "\n",
              "                  In Novel B, what is the material of the chair Alex sketches?  \\\n",
              "Response ID                                                                      \n",
              "R_7rxJeX0686uzPHi                                               Wood             \n",
              "R_1Mng04p43Zl3xvb                                                NaN             \n",
              "R_1IHhRxaGPtHjhQ2                                               Wood             \n",
              "R_1oFSEE8V2SpwrhD                                               Wood             \n",
              "R_1jVUjK5fbarEH9T                                               Wood             \n",
              "\n",
              "                  chosen_story  \\\n",
              "Response ID                      \n",
              "R_7rxJeX0686uzPHi      Novel A   \n",
              "R_1Mng04p43Zl3xvb      Novel B   \n",
              "R_1IHhRxaGPtHjhQ2      Novel A   \n",
              "R_1oFSEE8V2SpwrhD      Novel B   \n",
              "R_1jVUjK5fbarEH9T      Novel A   \n",
              "\n",
              "                  Your opinion is fundamental to understand reader preferences. Please take your time to answer this question.\\n\\nIn your own words, briefly share your motivation for your choice (minimum 200 characters, or about 40 words).  \\\n",
              "Response ID                                                                                                                                                                                                                                       \n",
              "R_7rxJeX0686uzPHi  Novel A has more information about a potential...                                                                                                                                                                              \n",
              "R_1Mng04p43Zl3xvb  Novel B drew me in with the suspense of exactl...                                                                                                                                                                              \n",
              "R_1IHhRxaGPtHjhQ2  I prefer to read novel A because it was easier...                                                                                                                                                                              \n",
              "R_1oFSEE8V2SpwrhD  I'm also interested in art and enjoy a main ch...                                                                                                                                                                              \n",
              "R_1jVUjK5fbarEH9T  My curiosity about the contents of the note/le...                                                                                                                                                                              \n",
              "\n",
              "                  Your age: gender  \\\n",
              "Response ID                          \n",
              "R_7rxJeX0686uzPHi     25-35    Man   \n",
              "R_1Mng04p43Zl3xvb     36-45    Man   \n",
              "R_1IHhRxaGPtHjhQ2     25-35  Woman   \n",
              "R_1oFSEE8V2SpwrhD     18-24  Woman   \n",
              "R_1jVUjK5fbarEH9T     36-45  Woman   \n",
              "\n",
              "                  Your gender - Prefer to self describe: - Text  ...  \\\n",
              "Response ID                                                      ...   \n",
              "R_7rxJeX0686uzPHi                                           NaN  ...   \n",
              "R_1Mng04p43Zl3xvb                                           NaN  ...   \n",
              "R_1IHhRxaGPtHjhQ2                                           NaN  ...   \n",
              "R_1oFSEE8V2SpwrhD                                           NaN  ...   \n",
              "R_1jVUjK5fbarEH9T                                           NaN  ...   \n",
              "\n",
              "                  How would you describe your political views?  \\\n",
              "Response ID                                                      \n",
              "R_7rxJeX0686uzPHi                                     Moderate   \n",
              "R_1Mng04p43Zl3xvb                                     Moderate   \n",
              "R_1IHhRxaGPtHjhQ2                                 Very liberal   \n",
              "R_1oFSEE8V2SpwrhD                                     Moderate   \n",
              "R_1jVUjK5fbarEH9T                                      Liberal   \n",
              "\n",
              "                  In Novel A, when does Alex go to the cafe?  \\\n",
              "Response ID                                                    \n",
              "R_7rxJeX0686uzPHi                                        NaN   \n",
              "R_1Mng04p43Zl3xvb                                    Morning   \n",
              "R_1IHhRxaGPtHjhQ2                                        NaN   \n",
              "R_1oFSEE8V2SpwrhD                                        NaN   \n",
              "R_1jVUjK5fbarEH9T                                        NaN   \n",
              "\n",
              "                  In Novel A, what is the material of the chair Alex sketches?  \\\n",
              "Response ID                                                                      \n",
              "R_7rxJeX0686uzPHi                                                NaN             \n",
              "R_1Mng04p43Zl3xvb                                               Wood             \n",
              "R_1IHhRxaGPtHjhQ2                                                NaN             \n",
              "R_1oFSEE8V2SpwrhD                                                NaN             \n",
              "R_1jVUjK5fbarEH9T                                                NaN             \n",
              "\n",
              "                  In Novel B, how long is the trail that Sam takes?  \\\n",
              "Response ID                                                           \n",
              "R_7rxJeX0686uzPHi                                               NaN   \n",
              "R_1Mng04p43Zl3xvb                                           6 miles   \n",
              "R_1IHhRxaGPtHjhQ2                                               NaN   \n",
              "R_1oFSEE8V2SpwrhD                                               NaN   \n",
              "R_1jVUjK5fbarEH9T                                               NaN   \n",
              "\n",
              "                  In Novel B, what plant does Sam rest against? first_gender  \\\n",
              "Response ID                                                                    \n",
              "R_7rxJeX0686uzPHi                                           NaN            W   \n",
              "R_1Mng04p43Zl3xvb                                  Juniper tree            M   \n",
              "R_1IHhRxaGPtHjhQ2                                           NaN            W   \n",
              "R_1oFSEE8V2SpwrhD                                           NaN            W   \n",
              "R_1jVUjK5fbarEH9T                                           NaN            W   \n",
              "\n",
              "                  first_story Treatment A chose_hike respondent_woman  \n",
              "Response ID                                                            \n",
              "R_7rxJeX0686uzPHi        Hike        Hike          1                0  \n",
              "R_1Mng04p43Zl3xvb      Coffee        Hike          1                0  \n",
              "R_1IHhRxaGPtHjhQ2        Hike        Hike          1                1  \n",
              "R_1oFSEE8V2SpwrhD        Hike        Hike          0                1  \n",
              "R_1jVUjK5fbarEH9T        Hike        Hike          1                1  \n",
              "\n",
              "[5 rows x 24 columns]"
            ]
          },
          "execution_count": 13,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "public.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "SMTovWqrX6fB"
      },
      "outputs": [],
      "source": [
        "public.to_csv(os.path.join(data_dir, 'public_data.csv'))"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
